This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
#Hypothesis-Preseason rankings affect march madness results
library(readxl)
library(readr)
getwd()
## [1] "/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file"
setwd("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file")
CFB_Data <- read.csv("CBB Preseason Rankings.csv")
length(CFB_Data)
## [1] 4
head(CFB_Data)
dim(CFB_Data)
## [1] 250 4
summary(CFB_Data)
## Year Rank Team Round
## Min. :2009 Min. : 1 Length:250 Min. :0.000
## 1st Qu.:2011 1st Qu.: 7 Class :character 1st Qu.:1.000
## Median :2014 Median :13 Mode :character Median :2.000
## Mean :2014 Mean :13 Mean :2.436
## 3rd Qu.:2016 3rd Qu.:19 3rd Qu.:3.000
## Max. :2018 Max. :25 Max. :7.000
str(CFB_Data)
## 'data.frame': 250 obs. of 4 variables:
## $ Year : int 2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
## $ Rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Team : chr "DUKEDuke" "UVAVirginia" "UNCNorth Carolina" "GONZGonzaga" ...
## $ Round: int 4 7 3 4 5 3 4 3 6 3 ...
colSums(is.na(CFB_Data))
## Year Rank Team Round
## 0 0 0 0
names(CFB_Data)
## [1] "Year" "Rank" "Team" "Round"
CFB_Data
# Filter for Rank 1
rank1_data <- subset(CFB_Data, Rank == 1)
# Count the number of times each team has been ranked #1
rank1_counts <- table(rank1_data$Team)
rank1_counts
##
## DUKEDuke INDIndiana KUKansas UKKentucky
## 3 1 1 2
## UNCNorth Carolina VILLVillanova
## 2 1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
# Filter for top 5 ranks
top5_data <- subset(CFB_Data, Rank <= 5)
# Count the number of times each team has been in the top 5
top5_counts <- as.data.frame(table(top5_data$Team))
# Rename columns for clarity
names(top5_counts) <- c("Team", "Count")
# Create the bar plot
ggplot(top5_counts, aes(x = reorder(Team, Count), y = Count)) +
geom_bar(stat = "identity", fill = "orange") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Team", y = "Number of Times in Top 5", title = "Number of Times Teams Ranked in Top 5")
library(gridExtra)
# Filter for top 5 ranks
top5_data <- subset(CFB_Data, Rank <= 5)
# Prepare the data for plotting
# Create a table counting the number of top 5 ranks for each team by year
top5_yearly_counts <- as.data.frame(table(Year = top5_data$Year, Team = top5_data$Team))
# Rename columns for clarity
names(top5_yearly_counts) <- c("Year", "Team", "Count")
# Generate a list of unique teams
teams <- unique(top5_yearly_counts$Team)
# Plot the data in separate graphs (could use a loop or lapply for this)
plots <- lapply(teams, function(team) {
team_data <- subset(top5_yearly_counts, Team == team)
ggplot(team_data, aes(x = Year, y = Count)) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(0, max(top5_yearly_counts$Count)), breaks = 0:max(top5_yearly_counts$Count)) +
labs(title = paste("Top 5 Rankings Over Years:", team))
})
# Determine the number of columns and rows based on the number of plots
num_of_plots <- length(plots)
num_of_cols <- ceiling(sqrt(num_of_plots))
num_of_rows <- ceiling(num_of_plots / num_of_cols)
# Arrange the plots into a grid
do.call(grid.arrange, c(plots, ncol = num_of_cols, nrow = num_of_rows))
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
# Assuming your data frame is named CFB_Data and it has 'Rank' and 'Round' columns
# Calculate the Pearson correlation coefficient
correlation <- cor(CFB_Data$Rank, CFB_Data$Round, method = "pearson")
# If the data is not normally distributed or the relationship is not linear,
# use Spearman's rank correlation
correlation_spearman <- cor(CFB_Data$Rank, CFB_Data$Round, method = "spearman")
# Output the correlation coefficient
correlation
## [1] -0.4589596
correlation_spearman
## [1] -0.4608207
library(ggplot2)
ggplot(CFB_Data, aes(x = Rank, y = Round)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Preseason Ranking", y = "Tournament Round Reached",
title = "Correlation between Preseason Rankings and Tournament Performance")
## `geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
# Define what we mean by 'lower-ranked' and 'successful run'
lower_rank_threshold <- 15
successful_run_threshold <- 4
# Create the plot
ggplot(CFB_Data, aes(x = Rank, y = Round)) +
geom_point(aes(color = (Rank > lower_rank_threshold & Round > successful_run_threshold))) +
scale_color_manual(values = c('FALSE' = 'black', 'TRUE' = 'red')) +
geom_smooth(method = "lm", se = FALSE, color = 'blue') +
labs(x = "Preseason Ranking", y = "Tournament Round Reached",
title = "Highlighting Lower-Ranked Teams with Successful Tournament Runs") +
theme_minimal() +
theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'
# Define what we mean by 'lower-ranked' and 'successful run'
lower_rank_threshold <- 15
successful_run_threshold <- 4
# Identify the underdog moments
underdogs <- subset(CFB_Data, Rank > lower_rank_threshold & Round > successful_run_threshold)
# Create the plot
p <- ggplot(CFB_Data, aes(x = Rank, y = Round)) +
geom_point(aes(color = (Rank > lower_rank_threshold & Round > successful_run_threshold))) +
scale_color_manual(values = c('FALSE' = 'black', 'TRUE' = 'red')) +
geom_smooth(method = "lm", se = FALSE, color = 'blue') +
labs(x = "Preseason Ranking", y = "Tournament Round Reached",
title = "Highlighting Lower-Ranked Teams with Successful Tournament Runs") +
theme_minimal() +
theme(legend.position = "none")
# Annotate underdog teams
p <- p + geom_text(data = underdogs, aes(label = paste(Team, Year, sep=", ")),
vjust = -1, hjust = 1, color = 'red')
# Print the plot
print(p)
## `geom_smooth()` using formula = 'y ~ x'
# Load necessary libraries
library(ggplot2)
library(readr)
# Read the annual performance data
annual_performance <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/annual_performance.csv")
## Rows: 10 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): Year, Round
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Plot the time series of the average tournament round reached
ggplot(annual_performance, aes(x = Year, y = Round)) +
geom_line() + # Add a line to connect the points
geom_point() + # Add points to represent each year
theme_minimal() + # Use a minimal theme for the plot
labs(title = "Average Tournament Round Reached by Top 5 Preseason Ranked Teams",
x = "Year",
y = "Average Tournament Round Reached")
# Load the necessary libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Filter for "DUKEDuke" data
duke_data <- CFB_Data %>%
filter(Team == "DUKEDuke") %>%
group_by(Year) %>%
summarize(AverageRound = mean(Round))
# Plot the yearly performance trend for "DUKEDuke"
ggplot(duke_data, aes(x = Year, y = AverageRound)) +
geom_line() +
geom_point() +
theme_minimal() +
labs(title = "Yearly Tournament Performance Trend for Duke",
x = "Year",
y = "Average Tournament Round Reached")
# Load the necessary libraries
library(ggplot2)
library(dplyr)
library(readr)
# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Now, let's calculate the average tournament round reached for each rank for each year.
rank_performance_over_time <- CFB_Data %>%
group_by(Year, Rank) %>%
summarize(AverageRound = mean(Round), .groups = 'drop')
# Plot the general trend of ranks over the years.
ggplot(rank_performance_over_time, aes(x = Rank, y = AverageRound, group = Year)) +
geom_line(aes(color = factor(Year))) +
geom_point(aes(color = factor(Year))) +
theme_minimal() +
labs(title = "Tournament Outcome Trends Based on Preseason Ranks Over Years",
x = "Preseason Rank",
y = "Average Tournament Round Reached",
color = "Year")
library(ggplot2)
library(dplyr)
library(readr)
# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# For simplicity, let's start by looking at the trend for rank 1 only.
rank1_data <- CFB_Data %>%
filter(Rank == 1) %>%
group_by(Year) %>%
summarize(AverageRound = mean(Round), .groups = 'drop')
# Plot the trend for rank 1
ggplot(rank1_data, aes(x = Year, y = AverageRound)) +
geom_line() +
geom_point() +
theme_minimal() +
labs(title = "Yearly Tournament Performance Trend for Rank 1 Teams",
x = "Year",
y = "Average Tournament Round Reached")
# Calculate statistical summaries for each rank
rank_summaries <- CFB_Data %>%
group_by(Rank) %>%
summarize(MedianRound = median(Round),
IQR = IQR(Round),
.groups = 'drop')
# View the statistical summaries
print(rank_summaries)
## # A tibble: 25 × 3
## Rank MedianRound IQR
## <dbl> <dbl> <dbl>
## 1 1 4 1.75
## 2 2 4.5 4.25
## 3 3 3 2.5
## 4 4 4 1.75
## 5 5 3 2.5
## 6 6 3 3
## 7 7 3 1.75
## 8 8 3 1.75
## 9 9 4.5 2.75
## 10 10 2.5 2.75
## # ℹ 15 more rows
# Load the necessary libraries
library(ggplot2)
library(dplyr)
library(readr)
# Load your dataset
CFB_Data <- read_csv("/Users/shaunmckellarjr/Desktop/College Basketball Project/Excel file/CBB Preseason Rankings.csv")
## Rows: 250 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (3): Year, Rank, Round
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Make sure 'Year' is treated as numeric, which is necessary for regression
CFB_Data$Year <- as.numeric(CFB_Data$Year)
# Calculate the average round reached for each rank in each year
rank_performance <- CFB_Data %>%
group_by(Year, Rank) %>%
summarize(AverageRound = mean(Round), .groups = 'drop')
# Now we'll create a list of linear models, one for each rank
model_list <- list()
for(rank in unique(CFB_Data$Rank)) {
model_data <- rank_performance %>% filter(Rank == rank)
model_list[[as.character(rank)]] <- lm(AverageRound ~ Year, data = model_data)
}
# Summarize the models
model_summaries <- lapply(model_list, summary)
# Print the summaries for each rank
model_summaries
## $`1`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2485 -0.6500 -0.4000 0.8849 2.1697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -276.7697 325.9606 -0.849 0.421
## Year 0.1394 0.1619 0.861 0.414
##
## Residual standard error: 1.47 on 8 degrees of freedom
## Multiple R-squared: 0.08482, Adjusted R-squared: -0.02958
## F-statistic: 0.7414 on 1 and 8 DF, p-value: 0.4143
##
##
## $`2`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3939 -2.0379 0.0758 2.1288 2.5758
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -56.5151 534.0139 -0.106 0.918
## Year 0.0303 0.2652 0.114 0.912
##
## Residual standard error: 2.409 on 8 degrees of freedom
## Multiple R-squared: 0.001629, Adjusted R-squared: -0.1232
## F-statistic: 0.01305 on 1 and 8 DF, p-value: 0.9118
##
##
## $`3`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6364 -1.0864 -0.1273 0.7591 3.1454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -216.8545 433.6804 -0.500 0.631
## Year 0.1091 0.2154 0.506 0.626
##
## Residual standard error: 1.956 on 8 degrees of freedom
## Multiple R-squared: 0.03107, Adjusted R-squared: -0.09005
## F-statistic: 0.2565 on 1 and 8 DF, p-value: 0.6262
##
##
## $`4`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3879 -0.9742 -0.1727 0.8061 3.3879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -448.0121 402.3305 -1.114 0.298
## Year 0.2242 0.1998 1.122 0.294
##
## Residual standard error: 1.815 on 8 degrees of freedom
## Multiple R-squared: 0.136, Adjusted R-squared: 0.02802
## F-statistic: 1.259 on 1 and 8 DF, p-value: 0.2943
##
##
## $`5`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8 -1.3 -0.4 0.7 2.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -399.0000 390.3066 -1.022 0.337
## Year 0.2000 0.1938 1.032 0.332
##
## Residual standard error: 1.761 on 8 degrees of freedom
## Multiple R-squared: 0.1174, Adjusted R-squared: 0.007117
## F-statistic: 1.065 on 1 and 8 DF, p-value: 0.3324
##
##
## $`6`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3091 -0.3773 -0.1364 0.3106 2.2485
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1119.8788 292.7459 -3.825 0.00505 **
## Year 0.5576 0.1454 3.835 0.00498 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.321 on 8 degrees of freedom
## Multiple R-squared: 0.6477, Adjusted R-squared: 0.6037
## F-statistic: 14.71 on 1 and 8 DF, p-value: 0.004982
##
##
## $`7`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5515 -0.7212 0.1091 0.9318 1.8364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 197.84848 309.79870 0.639 0.541
## Year -0.09697 0.15386 -0.630 0.546
##
## Residual standard error: 1.398 on 8 degrees of freedom
## Multiple R-squared: 0.0473, Adjusted R-squared: -0.07178
## F-statistic: 0.3972 on 1 and 8 DF, p-value: 0.5461
##
##
## $`8`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7030 -0.7333 0.1030 0.4985 1.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 235.1576 260.0136 0.904 0.392
## Year -0.1152 0.1291 -0.892 0.399
##
## Residual standard error: 1.173 on 8 degrees of freedom
## Multiple R-squared: 0.09041, Adjusted R-squared: -0.02329
## F-statistic: 0.7952 on 1 and 8 DF, p-value: 0.3986
##
##
## $`9`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.08485 -1.24091 -0.00909 1.25455 2.76364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 345.6848 440.7478 0.784 0.455
## Year -0.1697 0.2189 -0.775 0.460
##
## Residual standard error: 1.988 on 8 degrees of freedom
## Multiple R-squared: 0.06988, Adjusted R-squared: -0.04639
## F-statistic: 0.601 on 1 and 8 DF, p-value: 0.4605
##
##
## $`10`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2667 -1.1500 0.1333 1.3500 2.2667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 539.5333 403.7160 1.336 0.218
## Year -0.2667 0.2005 -1.330 0.220
##
## Residual standard error: 1.821 on 8 degrees of freedom
## Multiple R-squared: 0.1811, Adjusted R-squared: 0.0787
## F-statistic: 1.769 on 1 and 8 DF, p-value: 0.2202
##
##
## $`11`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0000 -1.8833 0.2333 0.7500 4.2000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.13333 534.64091 0.256 0.804
## Year -0.06667 0.26553 -0.251 0.808
##
## Residual standard error: 2.412 on 8 degrees of freedom
## Multiple R-squared: 0.007818, Adjusted R-squared: -0.1162
## F-statistic: 0.06304 on 1 and 8 DF, p-value: 0.8081
##
##
## $`12`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.76364 -0.46667 -0.00606 0.50909 2.04242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -193.04848 257.85696 -0.749 0.475
## Year 0.09697 0.12806 0.757 0.471
##
## Residual standard error: 1.163 on 8 degrees of freedom
## Multiple R-squared: 0.06688, Adjusted R-squared: -0.04976
## F-statistic: 0.5733 on 1 and 8 DF, p-value: 0.4706
##
##
## $`13`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6182 -1.3318 -0.6242 1.1621 3.3697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.00606 387.05004 0.070 0.946
## Year -0.01212 0.19223 -0.063 0.951
##
## Residual standard error: 1.746 on 8 degrees of freedom
## Multiple R-squared: 0.0004968, Adjusted R-squared: -0.1244
## F-statistic: 0.003976 on 1 and 8 DF, p-value: 0.9513
##
##
## $`14`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0485 -0.9530 -0.4424 1.1954 2.9091
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -83.52121 373.83957 -0.223 0.829
## Year 0.04242 0.18567 0.228 0.825
##
## Residual standard error: 1.686 on 8 degrees of freedom
## Multiple R-squared: 0.006484, Adjusted R-squared: -0.1177
## F-statistic: 0.05221 on 1 and 8 DF, p-value: 0.825
##
##
## $`15`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9394 -0.6000 -0.1485 0.9803 1.6424
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -193.64848 267.21599 -0.725 0.489
## Year 0.09697 0.13271 0.731 0.486
##
## Residual standard error: 1.205 on 8 degrees of freedom
## Multiple R-squared: 0.06256, Adjusted R-squared: -0.05462
## F-statistic: 0.5339 on 1 and 8 DF, p-value: 0.4858
##
##
## $`16`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8606 0.1273 0.2000 0.2545 1.3091
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.61212 242.22348 0.209 0.840
## Year -0.02424 0.12030 -0.202 0.845
##
## Residual standard error: 1.093 on 8 degrees of freedom
## Multiple R-squared: 0.005051, Adjusted R-squared: -0.1193
## F-statistic: 0.04061 on 1 and 8 DF, p-value: 0.8453
##
##
## $`17`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39394 -0.73636 -0.05152 0.79394 1.41818
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -376.9939 237.5679 -1.587 0.151
## Year 0.1879 0.1180 1.592 0.150
##
## Residual standard error: 1.072 on 8 degrees of freedom
## Multiple R-squared: 0.2407, Adjusted R-squared: 0.1458
## F-statistic: 2.536 on 1 and 8 DF, p-value: 0.15
##
##
## $`18`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9273 -1.7636 -0.8455 1.9909 4.3636
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 149.03636 596.70259 0.250 0.809
## Year -0.07273 0.29635 -0.245 0.812
##
## Residual standard error: 2.692 on 8 degrees of freedom
## Multiple R-squared: 0.007472, Adjusted R-squared: -0.1166
## F-statistic: 0.06023 on 1 and 8 DF, p-value: 0.8123
##
##
## $`19`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0242 -0.2667 -0.1000 0.3697 2.2182
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -242.4606 284.6214 -0.852 0.419
## Year 0.1212 0.1414 0.857 0.416
##
## Residual standard error: 1.284 on 8 degrees of freedom
## Multiple R-squared: 0.08418, Adjusted R-squared: -0.0303
## F-statistic: 0.7353 on 1 and 8 DF, p-value: 0.4161
##
##
## $`20`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9454 -0.4273 -0.2242 0.2273 2.9939
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -376.1939 332.4069 -1.132 0.291
## Year 0.1879 0.1651 1.138 0.288
##
## Residual standard error: 1.499 on 8 degrees of freedom
## Multiple R-squared: 0.1393, Adjusted R-squared: 0.03175
## F-statistic: 1.295 on 1 and 8 DF, p-value: 0.288
##
##
## $`21`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1091 -0.9818 -0.5000 0.7500 2.0606
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.81212 292.74588 0.170 0.869
## Year -0.02424 0.14539 -0.167 0.872
##
## Residual standard error: 1.321 on 8 degrees of freedom
## Multiple R-squared: 0.003463, Adjusted R-squared: -0.1211
## F-statistic: 0.0278 on 1 and 8 DF, p-value: 0.8717
##
##
## $`22`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8182 -0.4394 -0.2121 0.3485 1.3939
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 428.60606 171.49509 2.499 0.0370 *
## Year -0.21212 0.08517 -2.490 0.0375 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7736 on 8 degrees of freedom
## Multiple R-squared: 0.4367, Adjusted R-squared: 0.3663
## F-statistic: 6.203 on 1 and 8 DF, p-value: 0.03749
##
##
## $`23`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.21818 -0.40909 -0.10000 0.05909 1.49091
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -474.81818 196.57886 -2.415 0.0422 *
## Year 0.23636 0.09763 2.421 0.0418 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8868 on 8 degrees of freedom
## Multiple R-squared: 0.4229, Adjusted R-squared: 0.3507
## F-statistic: 5.861 on 1 and 8 DF, p-value: 0.04178
##
##
## $`24`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.467 -0.950 -0.200 0.850 1.667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 135.53333 268.46694 0.505 0.627
## Year -0.06667 0.13333 -0.500 0.631
##
## Residual standard error: 1.211 on 8 degrees of freedom
## Multiple R-squared: 0.0303, Adjusted R-squared: -0.09091
## F-statistic: 0.25 on 1 and 8 DF, p-value: 0.6305
##
##
## $`25`
##
## Call:
## lm(formula = AverageRound ~ Year, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1091 -0.7000 -0.3818 0.8909 1.3455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -181.34545 213.46607 -0.850 0.420
## Year 0.09091 0.10602 0.857 0.416
##
## Residual standard error: 0.963 on 8 degrees of freedom
## Multiple R-squared: 0.08418, Adjusted R-squared: -0.0303
## F-statistic: 0.7353 on 1 and 8 DF, p-value: 0.4161
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Create a ggplot object
p <- ggplot(CFB_Data, aes(x = Rank, y = Round, text = paste(Team, Year, sep=", "))) +
geom_point(aes(color = (Rank > lower_rank_threshold & Round > successful_run_threshold))) +
scale_color_manual(values = c('FALSE' = 'black', 'TRUE' = 'red')) +
geom_smooth(method = "lm", se = FALSE, color = 'blue') +
labs(x = "Preseason Ranking", y = "Tournament Round Reached",
title = "Highlighting Lower-Ranked Teams with Successful Tournament Runs") +
theme_minimal()
# Convert the ggplot object to a Plotly object
p_plotly <- ggplotly(p, tooltip = "text")
## `geom_smooth()` using formula = 'y ~ x'
# Print the Plotly plot
p_plotly